0.1 Импорт

Импорт phyloseq объекта и библиотек
Датасет - хроносерия разложения соломы - от фактор Day 10ти уровней

library(phyloseq)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggpubr)
library(ampvis2)
library(ANCOMBC)
library(heatmaply)
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
## 
## Loading required package: viridis
## Loading required package: viridisLite
## Registered S3 method overwritten by 'dendextend':
##   method     from 
##   rev.hclust vegan
## Registered S3 methods overwritten by 'registry':
##   method               from 
##   print.registry_field proxy
##   print.registry_entry proxy
## Registered S3 method overwritten by 'seriation':
##   method         from 
##   reorder.hclust vegan
## 
## ======================
## Welcome to heatmaply version 1.3.0
## 
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
## 
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags: 
##   https://stackoverflow.com/questions/tagged/heatmaply
## ======================
library(WGCNA)
## Loading required package: dynamicTreeCut
## Loading required package: fastcluster
## 
## Attaching package: 'fastcluster'
## 
## The following object is masked from 'package:stats':
## 
##     hclust
## 
## 
## 
## Attaching package: 'WGCNA'
## 
## The following object is masked from 'package:stats':
## 
##     cor
ps.f <- readRDS("psf2")

0.2 functions

#phyloseq object to ampvis2 object
#https://gist.github.com/KasperSkytte/8d0ca4206a66be7ff6d76fc4ab8e66c6

phyloseq_to_ampvis2 <- function(physeq) {
  #check object for class
  if(!any(class(physeq) %in% "phyloseq"))
    stop("physeq object must be of class \"phyloseq\"", call. = FALSE)
  
  #ampvis2 requires taxonomy and abundance table, phyloseq checks for the latter
  if(is.null(physeq@tax_table))
    stop("No taxonomy found in the phyloseq object and is required for ampvis2", call. = FALSE)
  
  #OTUs must be in rows, not columns
  if(phyloseq::taxa_are_rows(physeq))
    abund <- as.data.frame(phyloseq::otu_table(physeq)@.Data)
  else
    abund <- as.data.frame(t(phyloseq::otu_table(physeq)@.Data))
  
  #tax_table is assumed to have OTUs in rows too
  tax <- phyloseq::tax_table(physeq)@.Data
  
  #merge by rownames (OTUs)
  otutable <- merge(
    abund,
    tax,
    by = 0,
    all.x = TRUE,
    all.y = FALSE,
    sort = FALSE
  )
  colnames(otutable)[1] <- "OTU"
  
  #extract sample_data (metadata)
  if(!is.null(physeq@sam_data)) {
    metadata <- data.frame(
      phyloseq::sample_data(physeq),
      row.names = phyloseq::sample_names(physeq), 
      stringsAsFactors = FALSE, 
      check.names = FALSE
    )
    
    #check if any columns match exactly with rownames
    #if none matched assume row names are sample identifiers
    samplesCol <- unlist(lapply(metadata, function(x) {
      identical(x, rownames(metadata))}))
    
    if(any(samplesCol)) {
      #error if a column matched and it's not the first
      if(!samplesCol[[1]])
        stop("Sample ID's must be in the first column in the sample metadata, please reorder", call. = FALSE)
    } else {
      #assume rownames are sample identifiers, merge at the end with name "SampleID"
      if(any(colnames(metadata) %in% "SampleID"))
        stop("A column in the sample metadata is already named \"SampleID\" but does not seem to contain sample ID's", call. = FALSE)
      metadata$SampleID <- rownames(metadata)
      
      #reorder columns so SampleID is the first
      metadata <- metadata[, c(which(colnames(metadata) %in% "SampleID"), 1:(ncol(metadata)-1L)), drop = FALSE]
    }
  } else
    metadata <- NULL
  
  #extract phylogenetic tree, assumed to be of class "phylo"
  if(!is.null(physeq@phy_tree)) {
    tree <- phyloseq::phy_tree(physeq)
  } else
    tree <- NULL
  
  #extract OTU DNA sequences, assumed to be of class "XStringSet"
  if(!is.null(physeq@refseq)) {
    #convert XStringSet to DNAbin using a temporary file (easiest)
    fastaTempFile <- tempfile(pattern = "ampvis2_", fileext = ".fa")
    Biostrings::writeXStringSet(physeq@refseq, filepath = fastaTempFile)
  } else
    fastaTempFile <- NULL
  
  #load as normally with amp_load
  ampvis2::amp_load(
    otutable = otutable,
    metadata = metadata,
    tree = tree,
    fasta = fastaTempFile
  )
}

#variance stabilisation from DESeq2

ps_vst <- function(ps, factor){
  
  require(DESeq2)
  require(phyloseq)
  
  diagdds = phyloseq_to_deseq2(ps, as.formula(paste( "~", factor)))              
  diagdds = estimateSizeFactors(diagdds, type="poscounts")
  diagdds = estimateDispersions(diagdds, fitType = "local") 
  pst <- varianceStabilizingTransformation(diagdds)
  pst.dimmed <- t(as.matrix(assay(pst))) 
  # pst.dimmed[pst.dimmed < 0.0] <- 0.0
  ps.varstab <- ps
  otu_table(ps.varstab) <- otu_table(pst.dimmed, taxa_are_rows = FALSE) 
  return(ps.varstab)
}

#WGCNA visualisation
#result - list class object with attributes:
# ps - phyloseq object
# amp - ampwis2 object
# heat - heatmap with absalute read numbers
# heat_rel - hetmap with relative abundances
# tree - phylogenetic tree with taxonomy

color_filt <- function(ps, df){
  
      library(tidyverse)
      library(reshape2)
      library(gridExtra)

  
  l = list()
  for (i in levels(df$module)){
    message(i)
    color_name <-  df %>% 
      filter(module == i) %>% 
      pull(asv) %>% 
      unique()
    ps.col <- prune_taxa(color_name, ps)
    amp.col <- phyloseq_to_amp(ps.col)
    heat <- amp_heatmap(amp.col, tax_show = 60, 
              group_by = "Day", 
              tax_aggregate = "OTU",
              tax_add = "Genus", 
              normalise=FALSE, 
              showRemainingTaxa = TRUE)
    
    ps.rel  <-  phyloseq::transform_sample_counts(ps.col, function(x) x / sum(x) * 100)
    amp.r <- phyloseq_to_ampvis2(ps.rel)
    heat.rel <- amp_heatmap(amp.r, tax_show = 60, 
          group_by = "Day", 
          tax_aggregate = "OTU",
          tax_add = "Genus", 
          normalise=FALSE, 
          showRemainingTaxa = TRUE)
    
    tree <- ps.col@phy_tree
    taxa <- as.data.frame(ps.col@tax_table@.Data)
    p1 <- ggtree(tree) + 
              geom_tiplab(size=2, align=TRUE, linesize=.5) +
              theme_tree2()
    
    taxa[taxa == "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"] <- "Allorhizobium"
    taxa[taxa == "Burkholderia-Caballeronia-Paraburkholderia"] <- "Burkholderia"
  
    tx <- taxa %>% 
      rownames_to_column("id") %>% 
      mutate(id = factor(id, levels = rev(get_taxa_name(p1)))) %>% 
      dplyr::select(-c(Kingdom, Species, Order)) %>% 
      melt(id.var = 'id')

    p2 <- ggplot(tx, aes(variable, id)) + 
      geom_tile(aes(fill = value), alpha = 0.4) +
      geom_text(aes(label = value), size = 2) +
      theme_bw() + 
      theme(legend.position = "none",
             axis.ticks.x=element_blank(),
             axis.text.y=element_blank(),
             axis.ticks.y=element_blank(),
             axis.title.x = element_blank(),
            axis.title.y = element_blank()) 

    p <- ggpubr::ggarrange(p1, p2, widths = c(0.9, 1))
    l[[i]] <- list("ps" = ps.col, 
                   "amp" = amp.col,
                   "heat" = heat,
                   "heat_rel" = heat.rel,
                   "tree" = p)
                    
  }
  return(l)
}

Разделим датасет на две группы

1-я - в более чем 10% образцов должно быть хотя бы 10 ридов - эта группа пойдет в анализ далее

ps.inall <- phyloseq::filter_taxa(ps.f, function(x) sum(x > 10) > (0.1*length(x)), TRUE)
ps.inall
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 338 taxa and 35 samples ]
## sample_data() Sample Data:       [ 35 samples by 2 sample variables ]
## tax_table()   Taxonomy Table:    [ 338 taxa by 7 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 338 tips and 337 internal nodes ]
## refseq()      DNAStringSet:      [ 338 reference sequences ]
amp.inall <- phyloseq_to_ampvis2(ps.inall)
amp_heatmap(amp.inall,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=FALSE,
            showRemainingTaxa = TRUE)

Вторая - оставшиеся, но более 100 ридов по всем образцам(далее - вылетающие мажоры(ВМ))
Остальные филотипы выкидываем из анализа

ps.exl  <- phyloseq::filter_taxa(ps.f, function(x) sum(x > 10) < (0.1*length(x)), TRUE)
ps.exl  <- prune_taxa(taxa_sums(ps.exl) > 100, ps.exl)
amp.exl <- phyloseq_to_ampvis2(ps.exl)
amp_heatmap(amp.exl,
            tax_show = 40,
            group_by = "Day",
            tax_aggregate = "OTU",
            tax_add = "Genus",
            normalise=FALSE,
            showRemainingTaxa = TRUE)

То же, но c относительной представленностью.

ps.per  <-  phyloseq::transform_sample_counts(ps.f, function(x) x / sum(x) * 100) 
ps.exl.taxa <- taxa_names(ps.exl)
ps.per.exl <- prune_taxa(ps.exl.taxa, ps.per)
amp.exl.r <- phyloseq_to_ampvis2(ps.per.exl)

amp_heatmap(amp.exl.r, tax_show = 60, 
      group_by = "Day", 
      tax_aggregate = "OTU",
      tax_add = "Genus", 
      round =  2, 
      normalise=FALSE, 
      showRemainingTaxa = TRUE)

amp_heatmap(amp.exl.r, tax_show = 60, 
      group_by = "Day", 
      tax_aggregate = "OTU",
      tax_add = "Genus", 
      round =  2, 
      normalise=FALSE, )

ВМ объединенные по родам. Относительная представленность.

amp_heatmap(amp.exl.r,
            tax_show = 30, 
            group_by = "Day", 
            tax_aggregate = "Genus",
            tax_add = "Phylum",
            tax_class = "Proteobacteria",
            round =  2,
            normalise=FALSE, 
            showRemainingTaxa = TRUE)

Если посмотреть, если как распределяются ВМ по дням - похоже это случай

phyloseq::sample_sums(ps.per.exl) %>% 
  as.data.frame(col.names = "values") %>% 
  setNames(., nm = "values") %>% 
  rownames_to_column("samples") %>% 
  mutate(Day = sapply(strsplit(samples, "-"), `[`, 3)) %>% 
  ggplot(aes(x=Day, y=values, color=Day)) +
  geom_boxplot(aes(color=Day)) +
  geom_point(color = "black", position = position_dodge(width=0.2)) +
  theme_bw() +
  theme(legend.position = "none")